clear all
set more off
set mem 200m

******************** merge data sets ********************

use c_ls, clear
sort folio ls 

merge folio using c_portad
tab _merge
rename _merge merge_portad
sort folio ls

*** household assets
merge folio using ii_ah
tab _merge
rename _merge merge_hhah
renpfix ah hah
sort folio ls 

*** household credit
merge folio using ii_crh
tab _merge
rename _merge merge_crh
sort folio ls 

*** household income
merge folio using ii_in
tab _merge
rename _merge merge_in
sort folio ls 

*** education
merge folio ls using iiia_ed
tab _merge
drop if _merge==2
rename _merge merge_ed
sort folio ls 

*** labor outcomes
merge folio ls using iiia_tb
tab _merge
rename _merge merge_tb
sort folio ls 

*** individual assets
merge folio ls using iiia_ah
tab _merge
rename _merge merge_ah
sort folio ls 

*** non-labor income 
merge folio ls using iiia_iin
tab _merge
rename _merge merge_iin
sort folio ls 

*** permanent migration
merge folio ls using mg2002
tab _merge
rename _merge merge_mg2
sort folio ls 

merge folio ls using iiia_mg
tab _merge
rename _merge merge_mg1
sort folio ls 

*** temporary migration
merge folio ls using mt2002
tab _merge
rename _merge merge_mt2
sort folio ls 

merge folio ls using iiia_mt
tab _merge
rename _merge merge_mt1
sort folio ls 

*** timeuse
merge folio ls using iiia_ata
tab _merge
rename _merge merge_ata
sort folio ls 

*** shocks
merge folio ls using iiia_shi
tab _merge
rename _merge merge_shi
sort folio ls 

*** health 
merge folio ls using iiib_es
tab _merge
rename _merge merge_es
sort folio ls 

*** mental health 
merge folio ls using iiib_sm
tab _merge
rename _merge merge_sm
sort folio ls 

*** tastes
merge folio ls using iiib_gh
tab _merge
rename _merge merge_gh
sort folio ls 

*** anthropometrics 
merge folio ls using s_sa
tab _merge
rename _merge merge_sa
sort folio ls 

*** relatives
merge folio ls using iiib_re
tab _merge
rename _merge merge_re
sort folio ls 

merge folio ls using iiib_re1_
tab _merge
rename _merge merge_re1
sort folio ls 

*** credit
merge folio ls using iiib_cr
tab _merge
rename _merge merge_cr
sort folio ls 

*** cognitive abiliy
merge folio ls using ea_eca
tab _merge
rename _merge merge_iq
sort folio ls 

*** weights
merge folio ls using hh02w_b3a
tab _merge
rename _merge merge_weightsa
sort folio ls 

merge folio ls using hh02w_b3b
tab _merge
rename _merge merge_weightsb
sort folio ls 

sort folio
merge folio using hh02w_bc
tab _merge
rename _merge merge_weightsc
sort folio ls 

gen str8 var1 = string(folio, "%08.0f")
gen str2 var2 = string(ls, "%02.0f")
gen pid_link = var1 + var2
sort pid_link

merge pid_link using migus
tab _merge
rename _merge merge_migus
sort pid_link

merge pid_link using mg2005
tab _merge
rename _merge merge_mg2005
sort pid_link

merge pid_link using mt2005
tab _merge
rename _merge merge_mt2005

******************** rename and recode variables *******************

*****************
*** migration ***
*****************

* migrated to US and return by 2005
gen byte mig2005=(mig2005_p==1 | mig2005_t==1)

* gen measure of permanent migrants
gen migus_perm=migus

* combine return migrants w/ stayers
replace migus=1 if mig2005==1


********************
*** demographics ***
********************

gen state=edo
gen muni=mpio
gen urban=(estrato==1 | estrato==2 | estrato==3) if estrato!=.
gen rural=(estrato==4) if estrato!=.
gen locid=id_loc

gen age=ls02_2 
gen age2=age*age
gen sex=ls04 
gen marst=ls10
gen lives_in_hh=(ls09==1) if ls09!=.

gen byte male=(sex==1) if sex!=.
gen byte female=(sex==3) if sex!=.
gen byte married=(marst==1 | marst==5) if marst!=.

***********************
*** relatives in US ***
***********************

gen byte relativeUS=(re01==1) if re01!=.
egen relativeUS_hh=max(relativeUS), by(folio)
rename numrelUS nrelativeUS
replace nrelativeUS=0 if relativeUS==0

gen byte spouseUS=(relUS1==1 | relUS2==1 | relUS3==1 | relUS4==1) if relativeUS!=.
gen byte parentUS=(relUS1==2 | relUS1==3 | relUS2==2 | relUS2==3 | relUS3==2 | relUS3==3 | relUS4==2 | relUS4==3) if relativeUS!=.
gen byte siblingUS=(relUS1==4 | relUS2==4 | relUS3==4 | relUS4==4) if relativeUS!=.
gen byte childUS=(relUS1==5 | relUS2==5 | relUS3==5 | relUS4==5) if relativeUS!=.
gen byte cousinUS=(relUS1==10 | relUS2==10 | relUS3==10 | relUS4==10) if relativeUS!=.
gen byte uncleUS=(relUS1==11 | relUS2==11 | relUS3==11 | relUS4==11) if relativeUS!=.
gen byte nephewUS=(relUS1==12 | relUS2==12 | relUS3==12 | relUS4==12) if relativeUS!=.
gen byte otherUS=(((relUS1>=6 & relUS1<=9) | relUS1==13) | ((relUS2>=6 & relUS2<=9) | relUS2==13) | ((relUS3>=6 & relUS3<=9) | relUS3==13) | ((relUS4>=6 & relUS4<=9) | relUS4==13)) if relativeUS!=.

gen byte cousin_uncle_nephewUS=(cousinUS==1 | uncleUS==1 | nephewUS==1)
gen byte relativeUS_same=(cousinUS==1 | siblingUS==1 | spouseUS==1)
gen byte relativeUS_older=(parentUS==1 | uncleUS==1)
gen byte relativeUS_young=(childUS==1 | nephewUS==1)


**********************
*** health outcomes***
**********************

gen height=sa07_2/100
gen weight=sa09_2
gen bmi=weight/(height^2)
gen byte overweight=(bmi>=25) if bmi!=.

gen hemogl=sa17_2*10
gen byte lowhemo=(hemogl<130) if hemogl!=.

gen systole=sa16_21
gen diastole=sa16_22
gen byte hbp=(systole>120 | diastole>80) if systole!=. & diastole!=.

gen int health=es01
gen goodhealth=(health==1 | health==2) if health!=.
gen int health_rel=es16
gen goodhealth_rel=(health_rel==1 | health_rel==2) if health_rel!=.

gen byte smoke=(gh06==1) if gh06!=.

egen mentalhealth=rowtotal(sm01-sm21)
replace mentalhealth=. if mentalhealth==0

**********************
*** time allocation***
**********************

gen byte social=(ata01a==1) if ata01a!=.
gen byte tv=(ata01b==1) if ata01b!=.
gen byte read=(ata01c==1) if ata01c!=.
gen byte cooked=(ata01d==1) if ata01d!=.
gen byte washed=(ata01e==1) if ata01e!=.
gen byte care=(ata01f==1) if ata01f!=.
gen byte homework=(ata01g==1) if ata01g!=.
gen byte internet=(ata01h==1) if ata01h!=.
gen byte wood=(ata01i==1) if ata01i!=.
gen byte water=(ata01j==1) if ata01j!=.


****************************
*** progressa & procampo ***
****************************

gen byte progresa_ind=(iin01a1_1==1 | iin01a1_1==1) if iin01a1_1!=.
gen byte procampo_ind=(iin01a2_1==1 | iin01a2_1==1) if iin01a2_1!=.

gen byte progresa=(in01a1_1==1 | in01a1_1==1) if in01a1_1!=.
gen byte procampo=(in01a2_1==1 | in01a2_1==1) if in01a2_1!=.


**********************
*** past migration ***
**********************

gen byte moved_mg=(mg08==1) if mg08!=.
gen byte moved_mt=(mt01==1) if mt01!=.
egen moved=rowmax(moved_mt moved_mg) if mg08!=. | mt01!=.

rename mt_nmoves nmoves_mt
rename mg_nmoves nmoves_mg
egen nmoves=rowtotal(nmoves_mt nmoves_mg) if mg08!=. | mt01!=.

gen byte docUS=(mg31_1==1 | mg31_1==2 | mg31_1==3 | mt20_1==1) if moved!=.

gen byte visitUS_question=(mg30==1 | mt19==1) if moved!=.
gen byte visitUS_country=(mg17p==1 | mt05p==1 | mt06p==1) if moved!=.
gen visitUS=visitUS_question

gen byte thought_move=(mg34==1) if mg34!=.
gen byte thought_US=(mg35p_2=="UNITED STATES") if mg34!=.
gen byte thought_abroad=(mg35p_2!="") if mg34!=.


**********************
****** score iq ******
**********************

gen byte cor01=(eca01==8)
gen byte cor02=(eca02==4)
gen byte cor03=(eca03==5)
gen byte cor04=(eca04==1)
gen byte cor05=(eca05==2)
gen byte cor06=(eca06==5)
gen byte cor07=(eca07==6)
gen byte cor08=(eca08==3)
gen byte cor09=(eca09==7)
gen byte cor10=(eca10==8)
gen byte cor11=(eca11==7)
gen byte cor12=(eca12==6)

egen int iq=rowtotal(cor*)
egen int iq_miss=rowmiss(eca*)
replace iq=. if iq_miss==12


**************
*** assets ***
**************

gen own_h_house=(hah03a==1) if hah03a!=.
gen own_h_property=(hah03b==1) if hah03b!=.
gen own_h_bicycle=(hah03c==1) if hah03c!=.
gen own_h_autos=(hah03d==1) if hah03d!=.
gen own_h_electronic=(hah03e==1) if hah03e!=.
gen own_h_washdry=(hah03f==1) if hah03f!=.
gen own_h_kitchapps=(hah03g==1) if hah03g!=.
gen own_h_bankacc=(hah03h==1) if hah03h!=.
gen own_h_tractor=(hah03i==1) if hah03i!=.
gen own_h_cows=(hah03j==1) if hah03j!=.
gen own_h_horses=(hah03k==1) if hah03k!=.
gen own_h_pigs=(hah03l==1) if hah03l!=.
gen own_h_foul=(hah03m==1) if hah03m!=.
gen own_h_other=(hah03n==1) if hah03n!=.

recode hah04a_2 hah04b_2 hah04c_2 hah04d_2 hah04e_2 hah04f_2 hah04g_2 hah04h_2 hah04i_2 hah04j_2 hah04k_2 hah04l_2 hah04m_2 hah04n_2 (10000000/900000000=.)
egen assets_h=rowtotal(hah04a_2 hah04b_2 hah04c_2 hah04d_2 hah04e_2 hah04f_2 hah04g_2 hah04h_2 hah04i_2 hah04j_2 hah04k_2 hah04l_2 hah04m_2 hah04n_2)
egen assets_h_miss=rowmiss(hah04a_2 hah04b_2 hah04c_2 hah04d_2 hah04e_2 hah04f_2 hah04g_2 hah04h_2 hah04i_2 hah04j_2 hah04k_2 hah04l_2 hah04m_2 hah04n_2)
replace assets_h=. if assets_h_miss==14
replace assets_h=. if assets_h>10000000

pca own_h_house own_h_property own_h_bicycle own_h_autos own_h_electronic own_h_washdry own_h_kitchapps own_h_bankacc own_h_tractor own_h_cows own_h_horses own_h_pigs own_h_foul own_h_other
predict assets_pc1, score

gen own_house=(ah04a==1) if ah04a!=.
gen own_property=(ah04b==1) if ah04b!=.
gen own_bicycle=(ah04c==1) if ah04c!=.
gen own_autos=(ah04d==1) if ah04d!=.
gen own_electronic=(ah04e==1) if ah04e!=.
gen own_washdry=(ah04f==1) if ah04f!=.
gen own_kitchapps=(ah04g==1) if ah04g!=.
gen own_bankacc=(ah04h==1) if ah04h!=.
gen own_tractor=(ah04i==1) if ah04i!=.
gen own_cows=(ah04j==1) if ah04j!=.
gen own_horses=(ah04k==1) if ah04k!=.
gen own_pigs=(ah04l==1) if ah04l!=.
gen own_foul=(ah04m==1) if ah04m!=.
gen own_other=(ah04n==1) if ah04n!=.

recode ah05a_2 ah05b_2 ah05c_2 ah05d_2 ah05e_2 ah05f_2 ah05g_2 ah05h_2 ah05i_2 ah05j_2 ah05k_2 ah05l_2 ah05m_2 ah05n_2 (10000000/900000000=.)
egen assets_i=rowtotal(ah05a_2 ah05b_2 ah05c_2 ah05d_2 ah05e_2 ah05f_2 ah05g_2 ah05h_2 ah05i_2 ah05j_2 ah05k_2 ah05l_2 ah05m_2 ah05n_2)
egen assets_i_miss=rowmiss(ah05a_2 ah05b_2 ah05c_2 ah05d_2 ah05e_2 ah05f_2 ah05g_2 ah05h_2 ah05i_2 ah05j_2 ah05k_2 ah05l_2 ah05m_2 ah05n_2)
replace assets_i=. if assets_i_miss==14
gen assets=assets_h
replace assets=assets_i if assets_i!=.
replace assets=. if assets>10000000


**************
*** shocks ***
**************

gen int lifechanged=shi01
gen int lifewillchange=shi02
gen byte lifeimproved=(lifechanged==1 | lifechanged==2) if lifechanged!=.
gen byte lifewillimprove=(lifewillchange==1 | lifewillchange==2) if lifewillchange!=.

**************
*** credit ***
**************

gen byte tanda=(cr04==1) if cr04!=.
gen byte buycredit=(cr06==1) if cr06!=.
gen byte canborrow=(cr08==1) if cr08!=.
gen byte saved=(cr27==1) if cr27!=.
gen byte savings=0 if saved==0
replace savings=cr28 if cr28!=.
replace savings=. if savings>10000000

gen byte saved_hh=0 if crh01_1a==1
replace saved_hh=1 if crh01_1b==2
replace saved_hh=1 if crh01_1c==3
replace saved_hh=1 if crh01_1d==4
replace saved_hh=1 if crh01_1e==5
replace saved_hh=1 if crh01_1f==6
replace saved_hh=1 if crh01_1g==7
replace saved_hh=1 if crh01_1h==8
replace saved_hh=1 if crh01_1i==9
replace saved_hh=1 if crh01_1j==10
replace saved_hh=1 if crh01_1k==11


*****************
*** education ***
*****************

gen literate=(ed02==1) if ed02!=.

gen attend_roster=ls14
gen grade_roster=ls15_1 
replace grade_roster=3 if grade_roster==. & attend_roster==3
replace grade_roster=2 if grade_roster==. & (attend_roster==4 | attend_roster==6)

gen ever_attend=ed05
gen attend=ed06
gen grade=ed07
gen graduate=ed08

replace attend=1 if ever_attend==3
recode attend attend_roster grade grade_roster (98=.)
recode grade grade_roster graduate (8=.)

replace grade=3 if grade==. & attend==3
replace grade=2 if grade==. & (attend==4 | attend==6)
replace graduate=2 if graduate==. & (attend==5 | attend==7 | attend==8 | attend==9 | attend==10)

gen int yearsch=.
replace yearsch=0 if attend==0 | attend==1 | attend==2
replace yearsch=0+grade if attend==3
replace yearsch=6 if grade>6 & grade!=. & attend==3
replace yearsch=6+grade if attend==4 
replace yearsch=9 if grade>3 & grade!=. & attend==4 
replace yearsch=7 if (graduate==2 | graduate==3) & attend==5 
replace yearsch=9 if graduate==1 & attend==5 
replace yearsch=9+grade if attend==6
replace yearsch=12 if grade>3 & grade!=. & attend==6
replace yearsch=10 if (graduate==2 | graduate==3) & (attend==7 | attend==8)
replace yearsch=12 if graduate==1 & (attend==7 | attend==8)
replace yearsch=14 if (graduate==2 | graduate==3) & attend==9 
replace yearsch=16 if graduate==1 & attend==9
replace yearsch=17 if (graduate==2 | graduate==3) & attend==10 
replace yearsch=18 if graduate==1 & attend==10

gen int yearsch_roster=.
replace yearsch_roster=0 if attend_roster==0 | attend_roster==1 | attend_roster==2
replace yearsch_roster=0+grade_roster if attend_roster==3
replace yearsch_roster=6 if grade_roster>6 & grade_roster!=. & attend_roster==3
replace yearsch_roster=6+grade_roster if attend_roster==4 
replace yearsch_roster=9 if grade_roster>3 & grade_roster!=. & attend_roster==4 
replace yearsch_roster=round(6+((9-7)*.7253)) if attend_roster==5 
replace yearsch_roster=9+grade_roster if attend_roster==6
replace yearsch_roster=12 if grade_roster>3 & grade_roster!=. & attend_roster==6
replace yearsch_roster=round(9+((12-10)*0.4118)) if attend_roster==7 
replace yearsch_roster=round(9+((12-10)*0.7469)) if attend_roster==8 
replace yearsch_roster=round(12+((16-12)*0.4595)) if attend_roster==9 
replace yearsch_roster=round(16+((18-16)*0.6627)) if attend_roster==10

gen int yearsch_check=.
replace yearsch_check=0 if attend==0 | attend==1 | attend==2
replace yearsch_check=0+grade if attend==3
replace yearsch_check=6 if grade>6 & grade!=. & attend==3
replace yearsch_check=6+grade if attend==4 
replace yearsch_check=9 if grade>3 & grade!=. & attend==4 
replace yearsch_check=round(6+((9-7)*.7253)) if attend==5 
replace yearsch_check=9+grade if attend==6
replace yearsch_check=12 if grade>3 & grade!=. & attend==6
replace yearsch_check=round(9+((12-10)*0.4118)) if attend==7 
replace yearsch_check=round(9+((12-10)*0.7469)) if attend==8 
replace yearsch_check=round(12+((16-12)*0.4595)) if attend==9 
replace yearsch_check=round(16+((18-16)*0.6627)) if attend==10

**********************************
*** labor market participation ***
**********************************

gen byte worked_roster=(ls12==1) if ls12!=.

gen empstat_main=tb02_1 if tb02_1!=.
gen worked_main=(tb02_1==1) if tb02_1!=.
gen worked_wk=(tb02_1==1 | tb03==1 | tb04==1) if tb02_1!=. | tb03!=. | tb04!=.
gen working=(tb02_1==1 | tb03==1 | tb04==1 | tb05==1) if tb02_1!=. | tb03!=. | tb04!=. | tb05!=. 
gen worked_ever=(tb02_1==1 | tb03==1 | tb04==1 | tb05==1 | tb06==1) if tb02_1!=. | tb03!=. | tb04!=. | tb05!=. | tb07!=. | tb06!=.
gen worked_yr=(tb02_1==1 | tb03==1 | tb04==1 | tb05==1 | tb07==1) if tb02_1!=. | tb03!=. | tb04!=. | tb05!=. | tb07!=.

gen worked=worked_yr

gen occ=tb24_26p_cmo 
gen ind=tb24_26p_scian 
gen hourswk=tb27p 
gen uhrswk=tb28p 
gen classwk=tb32p 
gen numweeks=tb29_p1 if tb29_p==1
replace numweeks=52 if tb29_p==2
replace numweeks=. if numweeks>52
gen hoursyr=numweeks*uhrswk


****************
*** earnings ***
****************

gen earnings_roster=ls13_2 
*** 13 outlier observations
replace earnings_roster=. if earnings_roster>900000 & earnings_roster!=. 

*** monthly reports ***

gen income_m=tb35a_2
egen incometot_m=rowtotal(tb35aa_2 tb35ab_2 tb35ac_2 tb35ad_2 tb35ae_2 tb35af_2 tb35ag_2 tb35ah_2 tb35ai_2)
egen incometot_m_miss=rowmiss(tb35aa_2 tb35ab_2 tb35ac_2 tb35ad_2 tb35ae_2 tb35af_2 tb35ag_2 tb35ah_2 tb35ai_2)
replace incometot_m=. if incometot_m_miss==9

gen income_m_profits_gross=tb37p1_2
*** one outlier observation
recode income_m_profits_gross (1111111=.) 
gen income_m_profits_net=tb37p2_2
gen income_m_profits=income_m_profits_gross
replace income_m_profits=income_m_profits_net if income_m_profits==.

egen earnings_m=rowtotal(income_m incometot_m income_m_profits)
egen earnings_m_miss=rowmiss(income_m incometot_m income_m_profits)
replace earnings_m=. if earnings_m_miss==3

*** yearly reports ***

gen income_yr=tb36a_2
*** outlier one observation
recode income_yr (3191980=.) 
egen incometot_yr=rowtotal(tb36aa_2 tb36ab_2 tb36ac_2 tb36ad_2 tb36ae_2 tb36af_2 tb36ag_2 tb36ah_2 tb36ai_2 tb36aj_2 tb36ak_2 tb36al_2 tb36am_2)
*gen incometot_yr_nomiss=1 if tb36aa_1=="A" | tb36ab_1=="B" | tb36ac_1=="C" | tb36ad_1=="D" | tb36ae_1=="E" | tb36af_1=="F" | tb36ag_1=="G" | tb36ah_1=="H" | tb36ai_1=="I" | tb36aj_1=="J" | tb36ak_1=="K" | tb36al_1=="L" | tb36am_1=="M" 
egen incometot_yr_miss=rowmiss(tb36aa_2 tb36ab_2 tb36ac_2 tb36ad_2 tb36ae_2 tb36af_2 tb36ag_2 tb36ah_2 tb36ai_2 tb36aj_2 tb36ak_2 tb36al_2 tb36am_2)
replace incometot_yr=. if incometot_yr_miss==13

gen income_yr_profits_gross=tb37p1_2
*** one outlier observation
recode income_yr_profits_gross (1111111=.) 
gen income_yr_profits_net=tb37p2_2
gen income_yr_profits=income_yr_profits_gross
replace income_yr_profits=income_yr_profits_net if income_yr_profits==.

egen earnings_yr=rowtotal(income_yr incometot_yr income_yr_profits)
egen earnings_yr_miss=rowmiss(income_yr incometot_yr income_yr_profits)
replace earnings_yr=. if earnings_yr_miss==3


*** calculate log earnings ***

gen ln_income_yr=ln(income_yr)
gen ln_incometot_yr=ln(incometot_yr)
gen ln_income_yr_profits_net=ln(income_yr_profits_net)
gen ln_income_yr_profits_gross=ln(income_yr_profits_gross)

gen ln_income_m=ln(income_m*12)
gen ln_incometot_m=ln(incometot_m*12)
gen ln_income_m_profits_net=ln(income_m_profits_net*12)
gen ln_income_m_profits_gross=ln(income_m_profits_gross*12)

gen ln_earnings_roster=ln(earnings_roster)
gen ln_earnings_m=ln(earnings_m)
gen ln_earnings_yr=ln(earnings_yr)

*twoway kdensity ln_incometot_yr || kdensity ln_income_yr || kdensity ln_income_yr_profits_gross || kdensity ln_income_yr_profits_net
*twoway kdensity ln_incometot_m || kdensity ln_income_m || kdensity ln_income_m_profits_gross || kdensity ln_income_m_profits_net

***************
*** 2nd job ***
***************

*** 2nd job hours and weeks ***
gen uhrswk_2nd=tb28s 
gen numweeks_2nd=tb29_s1 if tb29_s==1
replace numweeks_2nd=52 if tb29_s==2
gen hoursyr_2nd=numweeks_2nd*uhrswk_2nd

*** earnings ***
gen income_m_2nd=tb35b_2
gen income_m_profits_gross_2nd=tb37s1_2
gen income_m_profits_net_2nd=tb37s2_2
gen income_m_profits_2nd=income_m_profits_net_2nd
replace income_m_profits_2nd=income_m_profits_gross_2nd if income_m_profits_2nd==.

egen earnings_m_2nd=rowtotal(income_m_2nd income_m_profits_2nd)
egen earnings_m_miss_2nd=rowmiss(income_m_2nd income_m_profits_2nd)
replace earnings_m_2nd=. if earnings_m_miss_2nd==2

gen income_yr_2nd=tb36b_2
gen income_yr_profits_gross_2nd=tb38s1_2
gen income_yr_profits_net_2nd=tb38s2_2
gen income_yr_profits_2nd=income_yr_profits_net_2nd
replace income_yr_profits_2nd=income_yr_profits_gross_2nd if income_yr_profits_2nd==.

egen earnings_yr_2nd=rowtotal(income_yr_2nd income_yr_profits_2nd)
egen earnings_yr_miss_2nd=rowmiss(income_yr_2nd income_yr_profits_2nd)
replace earnings_yr_2nd=. if earnings_yr_miss_2nd==2


**************************************************
*** add 2nd job earnings and hours to main job ***
**************************************************
egen earnings_m_=rowtotal(earnings_m earnings_m_2nd)
egen earnings_m_miss_=rowmiss(earnings_m earnings_m_2nd)
replace earnings_m_=. if earnings_m_miss_==2
drop earnings_m
rename earnings_m_ earnings_m

egen earnings_yr_=rowtotal(earnings_yr earnings_yr_2nd)
egen earnings_yr_miss_=rowmiss(earnings_yr earnings_yr_2nd)
replace earnings_yr_=. if earnings_yr_miss_==2
drop earnings_yr
rename earnings_yr_ earnings_yr

egen hoursyr_=rowtotal(hoursyr hoursyr_2nd)
egen hoursyr_miss_=rowmiss(hoursyr hoursyr_2nd)
replace hoursyr_=. if hoursyr_miss_==2
drop hoursyr
rename hoursyr_ hoursyr


****************
*** last job ***
****************

gen lastworked_m=(tb08)
gen lastworked_yr=(tb09_2)
replace lastworked_yr=2002 if tb07==1
gen worked_last=1 if (worked_ever==1 & worked==0)
replace worked_last=. if lastworked_yr<1992

gen occ_last=tb14_16_cmo 
gen ind_last=tb14_16_scian 
gen classwk_last=tb17 
gen uhrswk_last=tb12_2
gen numweeks_last=tb13_2 if tb13_1==1
replace numweeks_last=52 if tb13_1==2
replace numweeks_last=. if numweeks_last>52

*** monthly reports ***
gen income_m_last=tb20_2
egen incometot_m_last=rowtotal(tb20a_2 tb20b_2 tb20c_2 tb20d_2 tb20e_2 tb20f_2 tb20g_2)
egen incometot_m_miss_last=rowmiss(tb20a_2 tb20b_2 tb20c_2 tb20d_2 tb20e_2 tb20f_2 tb20g_2)
replace incometot_m_last=. if incometot_m_miss_last==7
replace income_m_last=income_m_last*12
replace incometot_m_last=incometot_m_last*12

*** yearly reports ***
gen income_yr_last=tb21_2
egen incometot_yr_last=rowtotal(tb21h_2 tb21i_2 tb21j_2 tb21k_2 tb21l_2 tb21m_2)
egen incometot_yr_miss_last=rowmiss(tb21h_2 tb21i_2 tb21j_2 tb21k_2 tb21l_2 tb21m_2)
replace incometot_yr_last=. if incometot_yr_miss_last==6

*** profits reports ***
gen income_m_profits_gross_last=tb221_2
gen income_m_profits_net_last=tb222_2
gen income_m_profits_last=income_m_profits_net_last
replace income_m_profits_last=income_m_profits_gross_last if income_m_profits_last==.
replace income_m_profits_last=income_m_profits_last*12

*** sum earnings ***
egen earnings_yr_last=rowtotal(income_yr_last incometot_yr_last income_m_last incometot_m_last income_m_profits_last)
egen earnings_yr_miss_last=rowmiss(income_yr_last incometot_yr_last income_m_last incometot_m_last income_m_profits_last)
replace earnings_yr_last=. if earnings_yr_miss_last==5
gen earnings_m_last=earnings_yr_last/12

*** calculate log earnings ***
gen ln_earnings_yr_last=ln(earnings_yr_last)
gen ln_earnings_m_last=ln(earnings_m_last)

************************
*** merge proxy data ***
************************

sort folio ls

merge folio ls using mxfls02_proxy, update

rename _merge merge_proxy

foreach var of varlist literate ever_attend attend grade graduate yearsch yearsch_check empstat_main worked_main worked_wk worked_yr worked_ever worked occ ind hourswk uhrswk classwk numweeks income_m incometot_m incometot_m_miss income_m_profits earnings_m earnings_m_miss ln_earnings_m income_yr incometot_yr incometot_yr_miss income_yr_profits earnings_yr earnings_yr_miss ln_earnings_yr lastworked_m lastworked_yr worked_last occ_last ind_last classwk_last uhrswk_last numweeks_last income_m_last incometot_m_last incometot_m_miss_last income_yr_last incometot_yr_last incometot_yr_miss_last income_m_profits_last earnings_yr_last earnings_yr_miss_last earnings_m_last ln_earnings_yr_last ln_earnings_m_last moved visitUS tanda canborrow buycredit saved savings hoursyr {
	gen `var'_self=`var'
	replace `var'=`var'_proxy if `var'_self==. & `var'_proxy!=.
}

***** source of information *****
gen report=.
replace report=1 if merge_ed==3 | merge_iq==3 | merge_cr==3
replace report=2 if merge_proxy==3 & report==.
replace report=3 if report==.


*************************************
**** household characteristicis *****
*************************************

**** household size *****
gen byte ones=1
bys folio: egen hhsize=count(ones) if folio!=.

**** no. of children in household *****
gen byte child=1 if age<=15
gen byte child0_5=1 if age<=5
gen byte child6_15=1 if age>=6 & age<=15

bys folio: egen hhchilds=count(child) if folio!=.
bys folio: egen hhchilds0_5=count(child0_5) if folio!=.
bys folio: egen hhchilds6_15=count(child6_15) if folio!=.

***** migrants by household *****
egen migus_hh=max(migus) if folio!=., by(folio)
egen nmigus_hh=total(migus) if folio!=., by(folio)

***** past migrants by household *****
egen visitUS_hh=max(visitUS) if folio!=., by(folio)
egen nvisitUS_hh=total(visitUS) if folio!=., by(folio)



**********************************************************************
****** form state-level weighted migration and network patterns ******
**********************************************************************

egen relativeUS_locid_=mean(relativeUS), by(locid)
egen vistUS_locid_=mean(relativeUS), by(locid)

egen state_iiia=mean(factor_iiia) if state!=., by(state)
egen state_urban_iiia=mean(factor_iiia) if state!=. & urban!=., by(state urban)
egen locid_iiia=mean(factor_iiia) if locid!=., by(locid)

egen state_iiib=mean(factor_iiib) if state!=., by(state)
egen state_urban_iiib=mean(factor_iiib) if state!=. & urban!=., by(state urban)
egen locid_iiib=mean(factor_iiib) if locid!=., by(locid)

gen visitUS_w=visitUS*factor_iiia
gen relativeUS_w=relativeUS*factor_iiib

egen visitUS_state_iiia=mean(visitUS_w) if state!=., by(state)
egen visitUS_state_urban_iiia=mean(visitUS_w) if state!=. & urban!=., by(state urban)
egen visitUS_locid_iiia=mean(visitUS_w) if state!=., by(locid)

gen visitUS_state=visitUS_state_iiia/state_iiia
gen visitUS_state_urban=visitUS_state_urban_iiia/state_urban_iiia
gen visitUS_locid=visitUS_locid_iiia/locid_iiia

egen relativeUS_state_iiib=mean(relativeUS_w) if state!=., by(state)
egen relativeUS_state_urban_iiib=mean(relativeUS_w) if state!=. & urban!=., by(state urban)
egen relativeUS_locid_iiib=mean(relativeUS_w) if locid!=., by(locid)

gen relativeUS_state=relativeUS_state_iiib/state_iiib
gen relativeUS_state_urban=relativeUS_state_urban_iiib/state_urban_iiib
gen relativeUS_locid=relativeUS_locid_iiib/locid_iiib

drop visitUS_w relativeUS_w state_iiia state_urban_iiia locid_iiia locid_iiib state_iiib state_urban_iiib  visitUS_state_iiia relativeUS_state_iiib relativeUS_state_urban_iiib relativeUS_locid_iiib  visitUS_state_urban_iiia


***************************************
***** fill data with roster info ******
***************************************

*** define yearsch self + proxy + roster
gen yearsch_comb=yearsch
replace yearsch_comb=yearsch_roster if yearsch==.

*** define yearsch self + proxy + roster
gen worked_comb=worked_yr
replace worked_comb=worked_roster if worked==.

*** define earnings_yr self + proxy + roster
gen earnings_yr_comb=earnings_yr
replace earnings_yr_comb=earnings_roster if earnings_yr==. | earnings_yr==0
gen ln_earnings_yr_comb=ln(earnings_yr_comb)

gen earnings_m_comb=earnings_m*12
replace earnings_m_comb=earnings_roster if earnings_m==. | earnings_m==0
gen ln_earnings_m_comb=ln(earnings_m_comb)


*********************************************************************
*********************************************************************
***************** restrict data to male adults **********************
*********************************************************************
*********************************************************************

keep if male==1
keep if age>=21 & age<=65

*********************************************************************
*********************************************************************
***************** restrict data to male adults **********************
*********************************************************************
*********************************************************************


****************************************************************
*** predict earnings for those working with no/zero earnings ***
****************************************************************

gen earnings_m_actual=earnings_m
gen earnings_yr_actual=earnings_yr
gen ln_earnings_m_actual=ln_earnings_m
gen ln_earnings_yr_actual=ln_earnings_yr

gen earnings_m_pos=earnings_m if earnings_m>0
gen earnings_yr_pos=earnings_yr if earnings_yr>0
gen earnings_m_zero=(earnings_m==0) if earnings_m!=.
gen earnings_yr_zero=(earnings_yr==0) if earnings_yr!=.

*** use self+proxy to impute wages
xi: reg ln_earnings_yr_actual ln_earnings_roster i.age i.yearsch_comb married urban i.state
predict ln_earnings_yr_hat, xb
replace ln_earnings_yr=ln_earnings_yr_hat if (worked_yr==1 | worked_roster==1) & (earnings_yr_actual==0 | earnings_yr_actual==.)
replace earnings_yr=exp(ln_earnings_yr_hat) if (worked_yr==1 | worked_roster==1) & (earnings_yr_actual==0 | earnings_yr_actual==.)

xi: reg ln_earnings_m_actual ln_earnings_roster i.age i.yearsch_comb married urban i.state
predict ln_earnings_m_hat, xb 
replace ln_earnings_m=ln_earnings_m_hat if (worked_yr==1 | worked_roster==1) & (earnings_m_actual==0 | earnings_m_actual==.)
replace earnings_m=exp(ln_earnings_m_hat) if (worked_yr==1 | worked_roster==1) & (earnings_m_actual==0 | earnings_m_actual==.)

/*
*** use self only to impute wages
xi: reg ln_earnings_yr_self ln_earnings_roster i.age i.yearsch_comb married urban i.state
predict ln_earnings_yr_hat, xb
replace ln_earnings_yr=ln_earnings_yr_hat if (worked_yr==1 | worked_roster==1) & (earnings_yr_self==0 | earnings_yr_self==.)
replace earnings_yr=exp(ln_earnings_yr_hat) if (worked_yr==1 | worked_roster==1) & (earnings_yr_self==0 | earnings_yr_self==.)

xi: reg ln_earnings_m_self ln_earnings_roster i.age i.yearsch_comb married urban i.state
predict ln_earnings_m_hat, xb 
replace ln_earnings_m=ln_earnings_m_hat if (worked_yr==1 | worked_roster==1) & (earnings_m_self==0 | earnings_m_self==.)
replace earnings_m=exp(ln_earnings_m_hat) if (worked_yr==1 | worked_roster==1) & (earnings_m_self==0 | earnings_m_self==.)
*/


*** calculate hourly wages ***

gen hrwage_m=earnings_m/((numweeks/52)*4.33*uhrswk)
gen hrwage_yr=earnings_yr/(hoursyr)
gen hrwage_m_actual=earnings_m_actual/((numweeks/52)*4.33*uhrswk)
gen hrwage_yr_actual=earnings_yr_actual/(hoursyr)

*** trim hourly wages ***
count if hrwage_m!=.
count if hrwage_m<250 & hrwage_m>0.1
replace hrwage_m=. if hrwage_m>250 | hrwage_m<.1 

count if hrwage_yr!=.
count if hrwage_yr<250 & hrwage_yr>0.1
replace hrwage_yr=. if hrwage_yr>250 | hrwage_yr<.1

replace hrwage_yr_actual=. if hrwage_yr_actual>250 | hrwage_yr_actual<.1 
replace hrwage_m_actual=. if hrwage_m_actual>250 | hrwage_m_actual<.1 


*** log hourly wages ***
gen ln_hrwage_m=ln(hrwage_m)
gen ln_hrwage_yr=ln(hrwage_yr)
gen ln_hrwage_m_actual=ln(hrwage_m_actual)
gen ln_hrwage_yr_actual=ln(hrwage_yr_actual)


**********************************
*** generate missing variables ***
**********************************

foreach var of varlist relativeUS nrelativeUS canborrow assets_h iq goodhealth married hhchilds {
	gen missing_`var'=(`var'==.)
	sum `var'
	replace `var'=r(mean) if missing_`var'
}

**********************
****** cut data ******
**********************

egen age_cat5=cut(age), at(21,30,39,48,57,66)
recode age_cat5 (21=0) (30=1) (39=2) (48=3) (57=4)

egen iq_cat5=cut(iq), at(0,3,5,7,9,13)
recode iq_cat5 (0=0) (3=1) (5=2) (7=3) (9=4)

egen mentalhealth_cat5=cut(mentalhealth), group(5)
egen assets_cat=cut(assets_h), group(3)

forvalues assets=0/2 {
   quietly gen byte assets_cat_`assets'=0 if assets_cat!=.
   quietly replace assets_cat_`assets'=1 if assets_cat==`assets'
}

gen yearsch_roster_cat5=yearsch_roster
gen yearsch_comb_cat5=yearsch_comb
gen yearsch_cat5=yearsch
recode yearsch_cat5 yearsch_roster_cat5 yearsch_comb_cat5 (0/3=0) (4/6=1) (7/9=2) (10/12=3) (13/20=4)

***********************
****** save data ******
***********************

quietly compress
save mxfls02_imputed, replace
